# R language script to produce Figure 2 in the article:
# 
#   Spillover effects of reminder nudges in complex environments
#   by Alexander K. Koch, Dan Mønster, and Julia Nafziger


# This script uses the groundhog package in order to ensure reproducibility.
# If you wish to run the script without the groundhog package, set the 
# use_groundhog variable below to FALSE.
# WARNING: This may affect the ability of this script to reproduce the results
# in the paper.
#
# This script was tested with R version 4.3.2 (2023-10-31).
#
# Run as: Rscript figure_2_main_results.R

use_groundhog <- TRUE

if (use_groundhog) {
  options(repos = "https://cloud.r-project.org")
  library(groundhog)
  groundhog_day <- "2024-02-25"
  packages <- c("readr", "dplyr", "stringr", "tibble", "ggplot2")
  groundhog.library(packages, groundhog_day)
} else {
  warning("Not using groundhog. Script runs with available versions of packages.")
  library(readr)
  library(dplyr)
  library(stringr)
  library(tibble)
  library(ggplot2)
}

#
# Use results from the regression tables in the SI Appendix to create Fig. 2.
#

#
# If you run this script interactively, you may have to set your path
# so that R can find the files. If run as a script from a command line
# it should not be required. Run as: `Rscript figure_2_main_results.R`
#

path <- "."

#
# Here, we set the paths to the input CSV-files and the output path
# where the figure will be saved in PDF.
#
csv_path <- paste0(path, "/../Output/")
out_path <- paste0(path, "/../Output/")

#
# List of files to read regression tables from
#
csv_file_names <- c(
  "TableS4PanelACI.csv", 
  "TableS4PanelBCI.csv",
  "TableS5PanelACI.csv",
  "TableS5PanelBCI.csv",
  "TableS6PanelACI.csv",
  "TableS6PanelBCI.csv",
  "TableS7PanelACI.csv",
  "TableS7PanelBCI.csv"
)

#
# Check that files exist in the expected location
#
csv_files <- list.files(path = csv_path,
                        pattern = "TableS[4-7]Panel[A,B]CI.csv")

if (!setequal(csv_file_names, csv_files)) {
  error_message <- "
  Required files were not found. 
    Perhaps you need to run the Stata do-files first?
    Or, perhaps you need to set the 'path' variable in this script?
  "
  stop(error_message)
}

#
# Construct vectors of the actions reported on in the different tables
#

S4act <- c(
  "All actions",
  "Reminded Actions",
  "Non-reminded actions"
)

S5act <- c(
  "All actions",
  "Actions reminded in Reminder",
  "Actions reminded in ManyReminders",
  "Never Reminded"
)

S6act <- c(
  "All actions",
  "Reminded Actions",
  "Non-reminded actions"
)

S7act <- c(
  "All actions", 
  "Actions reminded in Reminder",
  "Actions reminded in ManyReminders",
  "Never Reminded"
)

#
# Construct a data frame with metadata for what is in the tables in the CSV
# files. The last column with the vectors of actions will be stored as a list
# of vectors.
#
csv_metadata <- tribble(
  ~file_name,             ~Study,             ~Label,                       ~Reminders,            ~Actions,
  "TableS4PanelACI.csv", "Feedback study",    "Reminder vs. Control",       "Reminders present",   S4act,
  "TableS4PanelBCI.csv", "No feedback study", "Reminder vs. Control",       "Reminders present",   S4act,
  "TableS5PanelACI.csv", "Feedback study",    "ManyReminders vs. Reminder", "Reminders present",   S5act,
  "TableS5PanelBCI.csv", "No feedback study", "ManyReminders vs. Reminder", "Reminders present",   S5act,
  "TableS6PanelACI.csv", "Feedback study",    "Reminder vs. Control",       "Reminders withdrawn", S6act,
  "TableS6PanelBCI.csv", "No feedback study", "Reminder vs. Control",       "Reminders withdrawn", S6act,
  "TableS7PanelACI.csv", "Feedback study",    "ManyReminders vs. Reminder", "Reminders withdrawn", S7act,
  "TableS7PanelBCI.csv", "No feedback study", "ManyReminders vs. Reminder", "Reminders withdrawn", S7act
)

#
# Loop over all csv files and construct the data frame with the data
#
coef_table <- data.frame()

for (n in 1:nrow(csv_metadata)) {
  table_file <- paste0(csv_path, csv_metadata$file_name[n])
  table_data <- read_csv(table_file,
                         show_col_types = FALSE,
                         name_repair = "unique_quiet")
  for (m in 1:length(csv_metadata$Actions[[n]])) {
    beta <- table_data[2, m + 1] |> 
      as.numeric()
    CI <- table_data[3, m + 1] |>
      str_extract_all("[-0-9\\.]+") |> 
      unlist() |> 
      as.numeric()
    coef_data_row <- data.frame(
      coef = beta,
      CI_low = CI[1],
      CI_high = CI[2],
      Label = csv_metadata$Label[n],
      Study = csv_metadata$Study[n],
      Reminders = csv_metadata$Reminders[n],
      Actions = csv_metadata$Actions[[n]][m]
    )
    coef_table <- bind_rows(coef_table, coef_data_row)
  }
}

# Some changes are needed to get everything the way it is needed
# for the figures, so some strings are replaced and numbering and
# lettering for the labels are introduced.
#
# Merge Non-reminded actions and Never Reminded into one level
# Not reminded actions
#
# Merge Reminded Actions with Actions reminded in Reminder
#
# Insert a newline after vs. in label

coef_table <-  coef_table |> 
  mutate(Actions = str_replace(Actions,
                               "All actions",
                               "I. All actions")) |> 
  mutate(Actions = str_replace(Actions,
                               "Actions reminded in Reminder",
                               "II. Actions reminded in Reminder")) |> 
  mutate(Actions = str_replace(Actions,
                               "Reminded Actions",
                               "II. Actions reminded in Reminder")) |> 
  mutate(Actions = str_replace(Actions,
                               "Actions reminded in ManyReminders",
                               "III. Actions reminded in ManyReminders")) |> 
  mutate(Actions = str_replace(Actions,
                               "Non-reminded actions",
                               "IV. Actions not reminded")) |> 
  mutate(Actions = str_replace(Actions,
                               "Never Reminded",
                               "IV. Actions not reminded")) |> 
  mutate(Label = if_else(Reminders == "Reminders present",
                         str_replace(Label,
                                     "Reminder vs",
                                     "A. Reminder vs"),
                         str_replace(Label,
                                     "Reminder vs",
                                     "C. Reminder vs"))) |> 
  mutate(Label = if_else(Reminders == "Reminders present",
                         str_replace(Label,
                                     "ManyReminders vs",
                                     "B. ManyReminders vs"),
                         str_replace(Label,
                                     "ManyReminders vs",
                                     "D. ManyReminders vs"))) |> 
  mutate(Label = str_replace(Label, "vs.", "\nvs.\n")) |> 
  mutate(Reminders = str_replace(Reminders,
                                 "Reminders present",
                                 "Immediate effects\nReminders present")) |> 
  mutate(Reminders = str_replace(Reminders,
                                 "Reminders withdrawn",
                                 "Intertemporal effects\nReminders withdrawn"))

#
# Make sure the factor variables are in the correct order for plotting.
#

coef_table$Study <- factor(coef_table$Study,
                           levels = c("Feedback study",
                                      "No feedback study"))


coef_table$Actions <- factor(coef_table$Actions,
                             levels = c("I. All actions",
                                        "II. Actions reminded in Reminder",
                                        "III. Actions reminded in ManyReminders",
                                        "IV. Actions not reminded")
)

coef_table$Label <- factor(coef_table$Label,
                           levels = c(
                             "A. Reminder \nvs.\n Control",
                             "B. ManyReminders \nvs.\n Reminder",
                             "C. Reminder \nvs.\n Control",
                             "D. ManyReminders \nvs.\n Reminder"
                           ))

study_colors <- c("Feedback study" = "black",
                  "No feedback study" = "darkgrey")

# Data frame to add annotation to actions in the category
# Actions not reminded to make it clear the action set depends
# on the contrast between treatments.
extra_annotation <- data.frame(
  Label = c(
    "A. Reminder \nvs.\n Control",
    "B. ManyReminders \nvs.\n Reminder",
    "C. Reminder \nvs.\n Control",
    "D. ManyReminders \nvs.\n Reminder"
  ),
  Actions = c(
    "IV. Actions not reminded",
    "IV. Actions not reminded",
    "IV. Actions not reminded",
    "IV. Actions not reminded"
  ),
  Study = c(
    "Feedback study",
    "No feedback study",
    "Feedback study",
    "No feedback study"
  ),
  Reminders = c(
    "Immediate effects\nReminders present",
    "Immediate effects\nReminders present",
    "Intertemporal effects\nReminders withdrawn",
    "Intertemporal effects\nReminders withdrawn"
  ),
  extra_label = c(
    "Not reminded\nin Reminder",
    "Never reminded",
    "Not reminded\nin Reminder",
    "Never reminded"
  )
)

#
# Order the factor labels as was done above for coef_table.
#

extra_annotation$Actions <- factor(
  extra_annotation$Actions,
  levels = c("I. All actions",
             "II. Actions reminded in Reminder",
             "III. Actions reminded in ManyReminders",
             "IV. Actions not reminded")
)

extra_annotation$Label <- factor(extra_annotation$Label,
                           levels = c(
                             "A. Reminder \nvs.\n Control",
                             "B. ManyReminders \nvs.\n Reminder",
                             "C. Reminder \nvs.\n Control",
                             "D. ManyReminders \nvs.\n Reminder"
                           ))

#
# Create the plot
#
effect_plot <- ggplot(data = coef_table,
                      aes(x = coef, y = Label, colour = Study)) +
  scale_color_manual(values = study_colors,
                     name = "Study",
                     limits = unique(coef_table$Study),
                     aesthetics = c("colour", "fill")) +
  geom_point(position = position_dodge(width = -0.5)) +
  geom_errorbarh(aes(xmin = CI_low, xmax = CI_high), 
                 position = position_dodge(width = -0.5),
                 height = 0.2) +
  geom_vline(xintercept = 0,
             linetype = "dashed") +
  geom_text(data = extra_annotation, aes(label = extra_label),
            x = 0.2, color = "black", hjust = "left", size = 2.8) +
  xlab("Treatment effect") +
  ylab("") +
  scale_y_discrete(limits = rev) +
  theme_bw() +
  theme(legend.position = "bottom",
        strip.background =  element_rect(fill = "white",
                                         color = "white",
                                         linewidth = 0)
  ) +
  facet_grid(Reminders ~ Actions,
             scales = "free_y",
             labeller = labeller(Actions = label_wrap_gen(width = 22)))

#
# Un-comment line below, if running interactively, and you want to see the plot
# on screen.
#

# plot(effect_plot)

plot_file_name <- paste0(out_path, "/figure_2_main_results.pdf")
ggsave(plot_file_name,  
       effect_plot,
       width = 2.3 * 4, height = 2.3 * 2)
